{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.2"
    },
    "colab": {
      "name": "XLNet.ipynb",
      "provenance": [],
      "include_colab_link": true
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "b5c9e0831bd04798aab699789e6d7fac": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_1736f8c677324c4cb2a0ac8a250b47db",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_dbb3b87c0796460c846b325571baf93b",
              "IPY_MODEL_a186b870fbd843a69f7f663757d8fb47",
              "IPY_MODEL_4d0b7418049d41d39fc49177f5eea0b5"
            ]
          }
        },
        "1736f8c677324c4cb2a0ac8a250b47db": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "dbb3b87c0796460c846b325571baf93b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_63c0c5d36c9f4a84a2117a66a49349bb",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_e4d0fe8fc89549949d0e0cb8676f8b17"
          }
        },
        "a186b870fbd843a69f7f663757d8fb47": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_34eb38f544e64a9d8288d6117a113d71",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 238192,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 238192,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_cbc957547444449e955f3fcca8ebdfdd"
          }
        },
        "4d0b7418049d41d39fc49177f5eea0b5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_222e2c023267442db310032f372d142a",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 238k/238k [00:00&lt;00:00, 679kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_fbcd8b645f484e0190649d332e05aa14"
          }
        },
        "63c0c5d36c9f4a84a2117a66a49349bb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "e4d0fe8fc89549949d0e0cb8676f8b17": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "34eb38f544e64a9d8288d6117a113d71": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "cbc957547444449e955f3fcca8ebdfdd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "222e2c023267442db310032f372d142a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "fbcd8b645f484e0190649d332e05aa14": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "3e6eff18d9c44bad8bd1e4037cb89a66": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_ed410ea442604b838ed945b0fef9de1c",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_a79c248e31cf46bc9b710cddf5cfa169",
              "IPY_MODEL_940168fbea04416e8fa1c18cbea33346",
              "IPY_MODEL_efed45d2c924473886300d0c11ed26c4"
            ]
          }
        },
        "ed410ea442604b838ed945b0fef9de1c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "a79c248e31cf46bc9b710cddf5cfa169": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_8e202649a60c4e3cba4fc8058f047b11",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_a9d6151622b0489c8941a29f85125ca3"
          }
        },
        "940168fbea04416e8fa1c18cbea33346": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_bfcfc8866fea46589ccfabcd19476893",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1351,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1351,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_07b04ca6b05e4c4dbe72f9cf82be6288"
          }
        },
        "efed45d2c924473886300d0c11ed26c4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_b0ad83649c4a4fc18cbd88124ba66e12",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 1.35k/1.35k [00:00&lt;00:00, 46.2kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_bfaf87a8548f4e7c881c5f86917fd8b2"
          }
        },
        "8e202649a60c4e3cba4fc8058f047b11": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "a9d6151622b0489c8941a29f85125ca3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "bfcfc8866fea46589ccfabcd19476893": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "07b04ca6b05e4c4dbe72f9cf82be6288": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "b0ad83649c4a4fc18cbd88124ba66e12": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "bfaf87a8548f4e7c881c5f86917fd8b2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "962e3078668d43c181e3365f38739d27": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_184c6cd7de8848a7b28ab5005ea0d450",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_ba0332fb94a84dab9d35416212ac41d9",
              "IPY_MODEL_1c85af52377c4322959381d57380551c",
              "IPY_MODEL_445d863ecf40449fa3e399addd11cf7e"
            ]
          }
        },
        "184c6cd7de8848a7b28ab5005ea0d450": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "ba0332fb94a84dab9d35416212ac41d9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_909a6c5bdeac4082bda346b34cb9c6e5",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_5811407228584eed8ac62e5849e9d756"
          }
        },
        "1c85af52377c4322959381d57380551c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_91ebebee226a4603843594ba78eebdab",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1637757076,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1637757076,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_627aa7938fd1434283b5b4bc6d393c98"
          }
        },
        "445d863ecf40449fa3e399addd11cf7e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_826c02ab040d45f0aa6037a5a5e27af5",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 1.64G/1.64G [00:32&lt;00:00, 47.2MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_70fa15df20784c959c6ff4bb244b2e24"
          }
        },
        "909a6c5bdeac4082bda346b34cb9c6e5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "5811407228584eed8ac62e5849e9d756": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "91ebebee226a4603843594ba78eebdab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "627aa7938fd1434283b5b4bc6d393c98": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "826c02ab040d45f0aa6037a5a5e27af5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "70fa15df20784c959c6ff4bb244b2e24": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/agemagician/ProtTrans/blob/master/Embedding/PyTorch/Advanced/ProtXLNet.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "WZbZ-X1nNX5y"
      },
      "source": [
        "<h3> Extracting protein sequences' features using ProtXLNet pretrained-model <h3>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2dP1skIiNX51"
      },
      "source": [
        "<b>1. Load necessry libraries including huggingface transformers<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VqLdcEsgNgmu",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "0c7f8a91-95bc-454d-bd42-7acce91a74b6"
      },
      "source": [
        "!pip install -q transformers sentencePiece"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\u001b[K     |████████████████████████████████| 2.6 MB 6.7 MB/s \n",
            "\u001b[K     |████████████████████████████████| 1.2 MB 55.7 MB/s \n",
            "\u001b[K     |████████████████████████████████| 636 kB 53.2 MB/s \n",
            "\u001b[K     |████████████████████████████████| 895 kB 34.8 MB/s \n",
            "\u001b[K     |████████████████████████████████| 3.3 MB 44.1 MB/s \n",
            "\u001b[?25h"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xD5QbnlPNX53"
      },
      "source": [
        "import torch\n",
        "from transformers import XLNetModel, XLNetTokenizer\n",
        "import re\n",
        "import os\n",
        "import requests\n",
        "from tqdm.auto import tqdm"
      ],
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "N8P8PuaPNX6P"
      },
      "source": [
        "<b>2. Load the vocabulary and ProtXLNet Model<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rCciR4OuNX6Q",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 81,
          "referenced_widgets": [
            "b5c9e0831bd04798aab699789e6d7fac",
            "1736f8c677324c4cb2a0ac8a250b47db",
            "dbb3b87c0796460c846b325571baf93b",
            "a186b870fbd843a69f7f663757d8fb47",
            "4d0b7418049d41d39fc49177f5eea0b5",
            "63c0c5d36c9f4a84a2117a66a49349bb",
            "e4d0fe8fc89549949d0e0cb8676f8b17",
            "34eb38f544e64a9d8288d6117a113d71",
            "cbc957547444449e955f3fcca8ebdfdd",
            "222e2c023267442db310032f372d142a",
            "fbcd8b645f484e0190649d332e05aa14",
            "3e6eff18d9c44bad8bd1e4037cb89a66",
            "ed410ea442604b838ed945b0fef9de1c",
            "a79c248e31cf46bc9b710cddf5cfa169",
            "940168fbea04416e8fa1c18cbea33346",
            "efed45d2c924473886300d0c11ed26c4",
            "8e202649a60c4e3cba4fc8058f047b11",
            "a9d6151622b0489c8941a29f85125ca3",
            "bfcfc8866fea46589ccfabcd19476893",
            "07b04ca6b05e4c4dbe72f9cf82be6288",
            "b0ad83649c4a4fc18cbd88124ba66e12",
            "bfaf87a8548f4e7c881c5f86917fd8b2"
          ]
        },
        "outputId": "80a9e055-394f-4a52-820e-16d669dd954a"
      },
      "source": [
        "tokenizer = XLNetTokenizer.from_pretrained(\"Rostlab/prot_xlnet\", do_lower_case=False)"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "b5c9e0831bd04798aab699789e6d7fac",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/238k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "3e6eff18d9c44bad8bd1e4037cb89a66",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/1.35k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Yq-cm1HvNX6U"
      },
      "source": [
        "xlnet_men_len = 512"
      ],
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "GK96KkaPNX6Y",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 100,
          "referenced_widgets": [
            "962e3078668d43c181e3365f38739d27",
            "184c6cd7de8848a7b28ab5005ea0d450",
            "ba0332fb94a84dab9d35416212ac41d9",
            "1c85af52377c4322959381d57380551c",
            "445d863ecf40449fa3e399addd11cf7e",
            "909a6c5bdeac4082bda346b34cb9c6e5",
            "5811407228584eed8ac62e5849e9d756",
            "91ebebee226a4603843594ba78eebdab",
            "627aa7938fd1434283b5b4bc6d393c98",
            "826c02ab040d45f0aa6037a5a5e27af5",
            "70fa15df20784c959c6ff4bb244b2e24"
          ]
        },
        "outputId": "4fbeb4da-5c72-4308-e38d-ceb3936bb809"
      },
      "source": [
        "model = XLNetModel.from_pretrained(\"Rostlab/prot_xlnet\",mem_len=xlnet_men_len)"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "962e3078668d43c181e3365f38739d27",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/1.64G [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "text": [
            "Some weights of the model checkpoint at Rostlab/prot_xlnet were not used when initializing XLNetModel: ['lm_loss.weight', 'lm_loss.bias']\n",
            "- This IS expected if you are initializing XLNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
            "- This IS NOT expected if you are initializing XLNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4DALWq-uNX6b"
      },
      "source": [
        "<b>3. Load the model into the GPU if avilabile and switch to inference mode<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "aJ9x1UkCNX6c"
      },
      "source": [
        "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')"
      ],
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vg-Xq9JINX6e"
      },
      "source": [
        "model = model.to(device)\n",
        "model = model.eval()"
      ],
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LHzvO2rWNX6f"
      },
      "source": [
        "<b>4. Create or load sequences and map rarely occured amino acids (U,Z,O,B) to (unk)<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_yO9KaaCNX6f"
      },
      "source": [
        "sequences_Example = [\"A E T C Z A O\",\"S K T Z P\"]"
      ],
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4PUKBD7dNX6h"
      },
      "source": [
        "sequences_Example = [re.sub(r\"[UZOBX]\", \"<unk>\", sequence) for sequence in sequences_Example]"
      ],
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xj10emItNX6j"
      },
      "source": [
        "<b>5. Tokenize, encode sequences and load it into the GPU if possibile<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Sr5TAXIsNX6k",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c04d9911-ae84-4e7f-f094-7269ac30cce7"
      },
      "source": [
        "ids = tokenizer.batch_encode_plus(sequences_Example, add_special_tokens=True, pad_to_max_length=True)"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py:2190: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
            "  FutureWarning,\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "74ktq66hNX6l"
      },
      "source": [
        "input_ids = torch.tensor(ids['input_ids']).to(device)\n",
        "attention_mask = torch.tensor(ids['attention_mask']).to(device)"
      ],
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "onNUfYXjNX6t"
      },
      "source": [
        "<b>6. Extracting sequences' features and load it into the CPU if needed<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "07YVm3IjNX6u"
      },
      "source": [
        "with torch.no_grad():\n",
        "    output = model(input_ids=input_ids,attention_mask=attention_mask,mems=None)\n",
        "    embedding = output.last_hidden_state\n",
        "    mems = output.mems"
      ],
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VGu2brubNX6x"
      },
      "source": [
        "embedding = embedding.cpu().numpy()"
      ],
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LVRB9g2kNX60"
      },
      "source": [
        "<b>7. Remove padding ([PAD]) and special tokens ([CLS],[SEP]) that is added by ProtXLNet model<b>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PSeqr4YdNX60"
      },
      "source": [
        "features = [] \n",
        "for seq_num in range(len(embedding)):\n",
        "    seq_len = (attention_mask[seq_num] == 1).sum()\n",
        "    padded_seq_len = len(attention_mask[seq_num])\n",
        "    seq_emd = embedding[seq_num][padded_seq_len-seq_len:padded_seq_len-2]\n",
        "    features.append(seq_emd)"
      ],
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4h3DOjYZNX62",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d9c6ed66-adab-4af3-83c7-0c08d9a0147f"
      },
      "source": [
        "print(features)"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[array([[ 0.4874513 , -0.7708785 ,  0.9900176 , ..., -0.37356144,\n",
            "        -1.0589752 ,  0.9559978 ],\n",
            "       [ 0.21325906, -0.54788995,  0.611549  , ..., -0.05471378,\n",
            "        -0.8787889 ,  0.24645127],\n",
            "       [ 0.37891322, -0.63965786,  0.67224425, ..., -0.14891209,\n",
            "        -0.6769571 ,  0.34598204],\n",
            "       ...,\n",
            "       [ 0.09265018, -0.68101174,  0.5181867 , ..., -0.32756284,\n",
            "        -0.56731117, -0.1644125 ],\n",
            "       [-0.08533233, -0.7438228 ,  0.29890773, ..., -0.24376427,\n",
            "        -0.1115306 , -0.7260989 ],\n",
            "       [-0.48225865, -0.8381691 ,  0.08214396, ..., -0.25196335,\n",
            "        -0.03577377, -0.5348947 ]], dtype=float32), array([[ 1.040305  , -0.95049226,  0.33534572, ..., -0.23747356,\n",
            "        -0.2755075 ,  0.4794891 ],\n",
            "       [ 0.50853604, -0.95084566,  1.0235122 , ..., -0.05893029,\n",
            "        -0.97527987,  0.08713222],\n",
            "       [ 0.65626603, -0.7540609 ,  0.43234858, ...,  0.367075  ,\n",
            "        -0.70632464, -0.55534136],\n",
            "       [ 0.5903382 ,  0.14152074,  0.295789  , ...,  0.2060117 ,\n",
            "        -0.8735834 ,  0.8067949 ],\n",
            "       [ 0.41400748,  0.6131701 ,  0.04792275, ...,  0.18511426,\n",
            "        -0.34098494, -0.41199982]], dtype=float32)]\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}